R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

#The gender wage gap by profession

wageGap = read.csv("/Users/rohithpanjala/Downloads/Wage_gap.csv")
head(wageGap)
##         Profession Women_weekly_earnings Women_earnings_wrt_men
## 1 Chief executives                  2019                   80.5
## 2         Surgeons                  3200                   87.3
## 3 General managers                  1153                   78.1
## 4      Programmers                  1718                   89.5
## 5           Lawyer                  1317                   76.3
## 6          Nusring                   556                   94.6
##   Men_weekly_earnings
## 1                2509
## 2                3665
## 3                1476
## 4                1920
## 5                1725
## 6                 588
library(ggplot2)
wageGap  = na.omit(wageGap)
wageGap1 = wageGap[,c("Profession","Women_weekly_earnings", "Men_weekly_earnings")]
wageGap1_sorted = wageGap[order(wageGap$Women_earnings_wrt_men),]
wageGap1_sorted$profession=reorder(wageGap1_sorted$Profession, wageGap1_sorted$Women_earnings_wrt_men)
plot_Wag = ggplot(wageGap1_sorted, aes(x = profession)) +
  geom_point(aes(y = Men_weekly_earnings), color = "red") +
  geom_point(aes(y = Women_weekly_earnings), color = "blue") +
  labs(x = "Profession", y = "Weekly earnings", title = "Profession wise gender gap ") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(plot_Wag)
wageGap1_sorted = wageGap[order(wageGap$Men_weekly_earnings, decreasing = TRUE), ]
wageGap1_sorted = arrange(wageGap1_sorted, Women_earnings_wrt_men)

wage_plot = ggplot(wageGap1_sorted, aes(x = Profession)) +
  geom_bar(aes(y = Men_weekly_earnings, fill = "Men earnings"), position = "dodge", stat = "identity") +
  geom_bar(aes(y = Women_weekly_earnings, fill = "Women earnings"), position = "dodge", stat = "identity") +
  scale_fill_manual(values = c("Men earnings" = "blue", "Women earnings" = "skyblue")) +
  labs(x = "Profession", y = "Weekly earnings",title = "Stacked bar graph for wage gap", color = "Color") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
  theme(plot.title = element_text(hjust = 0.5))

ggplotly(wage_plot)
wageGap = read.csv("/Users/rohithpanjala/Downloads/Wage_gap.csv")
wageGap1 = wageGap[,c("Profession","Women_weekly_earnings", "Men_weekly_earnings")]
# Create the plot
ggplot(wageGap1, aes(Profession)) + 
  geom_ribbon(aes(ymin = 0, ymax = Men_weekly_earnings, fill = "Men earnings"), alpha = 0.5) +
  geom_ribbon(aes(ymin = 0, ymax = Women_weekly_earnings, fill = "Women earnings"), alpha = 0.5) +
  geom_line(aes(y = Men_weekly_earnings, color = "Men earnings", group = 1)) +
  geom_line(aes(y = Women_weekly_earnings, color = "Women earnings", group = 1)) +
  theme(axis.text=element_text(angle=90,vjust=0.5))+
  scale_color_manual(values = c("blue", "brown")) +
  scale_fill_grey(name = NULL, guide = "none") +
  labs(title = "Line Graph with Different Color Shades for Two Lines",
       x = "Profession",
       y = "Weekly earnings",
       color = "Color") +
  theme(plot.title = element_text(hjust = 0.5))
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Removed 1 row containing missing values (`geom_line()`).

#State wise US crimerate in 2018

crime_rate = read.csv("/Users/rohithpanjala/Downloads/state_crime1.csv")
crime_rate = crime_rate[crime_rate$year == 2019,]
colnames(crime_rate) = c("state","year","population","total_crimes","Assault","Murder","Molest","Robbery")
crime_rate = crime_rate[crime_rate$state != "United States",]
a = ggplot(crime_rate, aes(x = state)) +
  geom_bar(aes(y = Assault, fill = "Assault"), stat = "identity") +
  geom_bar(aes(y = Murder, fill = "Murder"), stat = "identity") +
  geom_bar(aes(y = Molest, fill = "Molest"), stat = "identity") +
  geom_bar(aes(y = Robbery, fill = "Robbery"), stat = "identity") +
  scale_fill_manual(name = "Crime Type", values = c("Assault" = "blue", "Murder" = "red", "Molest" = "green", "Robbery" = "orange")) +
  xlab("State") +
  ylab("Number of crime cases") +
  ggtitle("State-Wise crime in US in 2018") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
  theme(plot.title = element_text(hjust = 0.5))

ggplotly(a)

#Indian startup companies statistics in 2021

Startup = read.csv("/Users/rohithpanjala/Downloads/Startup.csv")
xy = ggplot(Startup, aes(x=reorder(Headquarters,numberOfCompanies),y = numberOfCompanies, fill = numberOfCompanies))+
  geom_bar(stat = "identity") + 
  scale_fill_gradient(low = "skyblue", high = "purple")+
  labs(x="Cities",
       y="Number of companies",
       title="Bar graph representing number of companies founded in each city") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
ggplotly(xy)
library(tidyverse) 
library(ggplot2)

Startup_Count = read.csv("/Users/rohithpanjala/Downloads/Startup_stat2.csv")

# create the area plot using ggplot
startup = ggplot(Startup_Count, aes(x = Year, y = Startup_count , fill = Startup_count)) +
  geom_area() +
  scale_x_continuous(breaks = Startup_Count$Year) +
  scale_fill_gradient(low = "white", high = "darkgreen") +
  labs(x = "Year", y = "Startup count", title = "Area plot for Startups added each year") +
  theme(legend.position = "none")

startup